import json
import csv

train_path = 'data/people-relation/origin/train.txt'
test_path = 'data/people-relation/origin/test.txt'
file_path_label = 'data/people-relation/origin/rel_dict.json'

train_path_new = 'data/people-relation/train.tsv'
test_path_new = 'data/people-relation/test.tsv'
file_path_label_new = 'data/people-relation/label.txt'


label_to_id = {}
str = ''
#转换标签文件

#将###替换为实体名称且在前后加上<e1>、<e2>
def process_text(text):
    per1, per2, doc = text.split('$')
    #记住，ent2与ent1 index 差长度减1个
    #从前往后找，第一个发现的为<e1>
    ent1_start = doc.find('#')
    ent1_end = ent1_start+len(per1) -1
    #从后往前找，第一个发现的为<e2>
    ent2_end = doc.rfind('#')
    ent2_start = ent2_end - len(per2) + 1
    doc = list(doc)
    #替换#为实体名称
    for index in range(0,ent1_end-ent1_start+1):
        doc[ent1_start+index] = per1[index]
    for index in range(0,ent2_end-ent2_start+1):
        doc[ent2_start+index] = per2[index]
    #加上标记
    doc.insert(ent1_start,'^')#标记实体1开始
    doc.insert(ent1_end+1+1, '&')#标记实体1结束
    #由于前面的增加，后面标记要递增
    doc.insert(ent2_start+2,'*')#标记实体2开始
    doc.insert(ent2_end+1+3, '=')#标记实体2结束
    doc = "".join(doc)
    print(doc)
    return doc


with open(file_path_label, 'r', encoding='utf-8') as f:
    for i in f.readlines():
        str += i
label_i_to_name = {}
label_name_to_i = json.loads(str)
with open(file_path_label_new, 'w', encoding='utf-8') as f:
    for label_name in label_name_to_i:
        f.writelines(label_name+'\n')
        label_i_to_name[label_name_to_i[label_name]] = label_name

print(label_i_to_name)

f_out = open(train_path_new, 'w', encoding='utf-8')
with open(train_path, 'r', encoding='utf-8') as f_read:
    content = [_.strip() for _ in f_read.readlines()]
for line in content:
    parts = line.split()
    label, text = parts[0], ''.join(parts[1:])
    label = label_i_to_name[int(label)]
    text = process_text(text)
    f_out.writelines(label+'\t'+text+'\n')
